# config.py
"""
Stores configuration parameters for the dynamic allocation simulation.
Revised for higher average costs and mu discretization.
"""

import numpy as np

# --- Simulation Parameters ---
T = 1000  # Number of rounds
K = 3   # Number of agents
GAMMA = 0.9 # Discount factor for agents
NUM_EPISODES = 1000 # Number of episodes for learning
ALPHA = 0.1  # Learning rate for Q-learning
EPSILON_START = 1.0 # Starting exploration rate
EPSILON_END = 0.01 # Ending exploration rate
EPSILON_DECAY = 0.995 # Exploration decay rate per episode

# --- Agent Configuration ---
NUM_VALUE_BINS = 10 # Number of bins for discretizing private value v
NUM_REPORT_ACTIONS = 11 # Number of discrete report actions (e.g., 0.0, 0.1, ..., 1.0)
VALUE_RANGE = (0, 1)
MIN_VALUE_RANGE = [0, 0, 0]
MAX_VALUE_RANGE = [1, 1, 1] # Range of private values
REPORT_RANGE = (0, 1) # Range of reports

# --- Planner Configuration ---
RHO = 0.5 # Planner's target average cost per round for each resource dimension.
COST_DIM = 1 # Dimension of cost vector (e.g., 1 for a single resource type)
# COST_DIM = 2 # Dimension of cost vector (e.g., 1 for a single resource type)

# --- Item Consumption Configuration ---
MIN_ITEM_CONSUMPTION_FACTOR = [0.7, 0.7, 0.7]
MAX_ITEM_CONSUMPTION_FACTOR = [1.3, 1.3, 1.3]
# --- Mu (Dual Variable) Discretization Configuration (NEW) ---
# For agents to learn based on the planner's dual variable (mu).
# Assuming mu is scalar (COST_DIM = 1). If COST_DIM > 1, this needs more complex handling.
NUM_MU_BINS = 10      # Number of bins for discretizing mu
# Heuristic range for mu. Based on Balseiro et al. Proposition 2, mu_j <= f_bar/rho_j + 1.
# If f_bar=1 (max value), rho_j approx RHO=0.5, then mu_j <= 1/0.5 + 1 = 3.
# Let's use a slightly wider range for safety, e.g., 0 to 5.
MU_RANGE = (0, 1 / RHO)  # Expected range of the dual variable mu (for COST_DIM=1)

# --- Plotting Configuration ---
PLOT_WINDOW_SIZE = 50 # Moving average window size for plots
